import requests  # 导入requests库，用于网络请求
from bs4 import BeautifulSoup  # 导入BeautifulSoup用于解析HTML
import pandas as pd  # 导入pandas用于数据保存
import time  # 导入time用于延时

base_url = 'https://quotes.toscrape.com/page/{}/'  # 示例网站，支持多页
all_quotes = []

for page in range(1, 6):  # 爬取前5页
    url = base_url.format(page)
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        quotes = soup.select('.quote')
        for q in quotes:
            text = q.select_one('.text').text.strip()
            author = q.select_one('.author').text.strip()
            all_quotes.append({'内容': text, '作者': author})
        print(f'第{page}页爬取完成')
        time.sleep(1)  # 延时，防止请求过快
    except Exception as e:
        print(f'第{page}页爬取失败：{e}')

# 保存为Excel
pd.DataFrame(all_quotes).to_excel('quotes.xlsx', index=False)
print('所有数据已保存到 quotes.xlsx') 